library(sasld) # Tabella 11.1 x <- c(26,233,164,117,473,293,172,383,132) tbl <- matrix(x,nrow=3,byrow=TRUE) rownames(tbl) <- c("Sopra la media","In media","Sotto la media") colnames(tbl) <- c("Non troppo felice","Abbastanza felice","Molto felice") tbl ptbl <- prop.table(tbl,margin=1) round(ptbl*100,1) # Tabella 11.6 cq <- chisq.test(tbl) fatt <- cq$expected round(fatt,2) cq cont <- (tbl - fatt)^2/fatt round(cont,3) sum(cont) # ------------------------------------------------------------------- # 11.2 -------------------------------------------------------------- # Tabella 11.9 x <- c(347,11188,327,13708) tbl <- matrix(x,nrow=2,byrow=TRUE) rownames(tbl) <- c("placebo","aspirina") colnames(tbl) <- c("ś","no") ptbl <- prop.table(tbl,margin=1) tbl round(ptbl*100,1) cq <- chisq.test(tbl,correct=FALSE) cq prop.test(c(347,327),c(11535,14035),correct=FALSE) p1 <- 347/11535 p2 <- 327/14035 p0 <- (347+327)/(11535+14035) var0 <- p0*(1-p0)/11535 + p0*(1-p0)/14035 se0 <- sqrt(var0) c(p1,p2,se0) z <- (p1 - p2)/se0 z c(z^2,cq$statistic) # ------------------------------------------------------------------- # 11.3 -------------------------------------------------------------- nrep <- 10000 dimc <- 100 # da sostituire con 200 e 10000 set.seed(123456) fem.si <- rbinom(n=nrep,size=dimc,prob=0.51) mas.si <- rbinom(n=nrep,size=dimc,prob=0.49) fem.no <- dimc - fem.si mas.no <- dimc - mas.si X <- cbind(fem.si,fem.no,mas.si,mas.no) head(X) colMeans(X) X2 <- numeric(nrep) pval <- numeric(nrep) for (i in 1:nrep) { tbl <- c(fem.si[i],fem.no[i],mas.si[i],mas.no[i]) tbl <- matrix(tbl,nrow=2,byrow=TRUE) cq <- chisq.test(tbl) X2[i] <- cq$statistic pval[i] <- cq$p.value } table(pval <= 0.05) mean(X2) table(pval <= 0.005) table(pval <= 0.001) qchisq(c(0.05,0.005,0.001),1,lower.tail=FALSE) hist(log10(pval)) # ------------------------------------------------------------------- # 11.4 -------------------------------------------------------------- # Tabella 11.17 x <- c(241,499,208,131,133,344,258,186) tbl <- matrix(x,nrow=2,byrow=TRUE) rownames(tbl) <- c("Donne","Uomini") colnames(tbl) <- c("Molto","Moderatamente","Un po'","Per nulla") tbl cq <- chisq.test(tbl) fatt <- cq$expected round(fatt,1) rst <- cq$stdres round(rst,3) # ------------------------------------------------------------------- # 11.5 -------------------------------------------------------------- # Tabella 11.18 x <- c(3,1,1,3) tbl <- matrix(x,nrow=2,byrow=TRUE) rownames(tbl) <- c("Latte","The") colnames(tbl) <- c("Latte","The") tbl chisq.test(tbl,correct=FALSE) fisher.test(tbl) fisher.test(tbl,alternative="greater") dhyper(3,4,4,4) dhyper(c(0:4),4,4,4) # grandi campioni x <- c(347,11188,327,13708) tbl <- matrix(x,nrow=2,byrow=TRUE) fisher.test(tbl) chisq.test(tbl,correct=FALSE) x <- c(0:674) p <- dhyper(x,11535,14035,674) sum(p) plot(x,p,type = "h",xlab="Valore nella prima cella",ylab="Probabilità") points(347,0,pch=19,col="red",cex=1.5) # Figura a pag. 635 plot(x[250:360],p[250:360],type = "h",xlab="Valore nella prima cella",ylab="Probabilità") points(347,0,pch=19,col="black",cex=1.5) sum(p[348:675]) fisher.test(tbl,alternative="greater") # media della distribuzione m <- sum(x*p) # varianza EX2 <- sum(x^2*p) v <- EX2 - m^2 c(m,sqrt(v)) # media e deviazione standard (347-m)/sqrt(v) # test z p1 <- 347/11535 p2 <- 327/14035 p0 <- (347+327)/(11535+14035) var0 <- p0*(1-p0)/11535 + p0*(1-p0)/14035 se0 <- sqrt(var0) c(p1,p2,se0) z <- (p1 - p2)/se0 z # RR e OR x <- c(347,11188,327,13708) tbl <- matrix(x,nrow=2,byrow=TRUE) tbl p1 <- 347/11535 p2 <- 327/14035 p1/p2 o1 <- 347/11188 o2 <- 327/13708 o1/o2 c(p1,o1,p2,o2) # -------------------------------------------------------------------